package service;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.swing.JFrame;
import javax.swing.JOptionPane;

import util.ExcelTool;

public class SendGet extends Thread{

	private static List<Map<String, String>> pageinfoList;
	private static int threadNumber = 0;
	private static List<Thread> threadPool = new ArrayList<>();
	private static int startThreadNumber = 30;
	
	public void startGetInfoToExcel(String keywords, int number, String path, JFrame frame, int startThreadNumberInput) throws InterruptedException{
//		String keywords = "tze 231"; // 关键字
		startThreadNumber = startThreadNumberInput;
		int page = 1; // 第几页
		keywords = URLEncoder.encode(keywords);
		List<String> urlList = new ArrayList<>(); // 商品页面链接存储
		while (urlList.size() < number) {
			String baseurl = "https://www.amazon.com/s/ref=sr_pg_2";
			String param = "page=" + page + "&keywords=" + keywords;
			String res = sendGet(baseurl, param); // 获取列表页面第page页
			String pattern1 = "id=\"result_\\d+.*?href=\"(https://www.*?)\""; // 从列表页面，获取商品页面链接
			while (getMathString(pattern1, res).size() == 0)
				res = sendGet(baseurl, param);
			for (String str : getMathString(pattern1, res)) {
				urlList.add(str);
				System.err.println(str);
			}
			System.err.println("page=" + page);
			page++;
		}
		
		pageinfoList = new ArrayList<>(); // 商品页面信息存储
		int crawlerNumber = 0; // 已爬取条数
		for (String url : urlList) {
			crawlerNumber++;
			if (crawlerNumber > number)
 				break;
			Runnable tempRunable = new Runnable(){
			 	@Override
			 	public void run() {
			 		if (threadNumber > threadPool.size())
			 			return;
			 		try {
			 			Map<String, String> pageInfo = getPageInfo(url);
						pageinfoList.add(pageInfo); // 获取页面信息，可以在这里控制数量
			 		} catch (Exception e) {
			 			e.printStackTrace();
			 		}
			 		synchronized (this) {
			 			System.err.println(threadNumber - startThreadNumber);
						if (threadNumber < threadPool.size()) {
							threadPool.get(threadNumber).start();
						}
						else if (pageinfoList.size() == threadPool.size()){
							ExcelTool.writeExcel(pageinfoList, path);
							JOptionPane.showMessageDialog(frame, "导出成功！", "提示：", JOptionPane.CANCEL_OPTION);
						}
						threadNumber++;
					}
			 	}
			 };
			threadPool.add(new Thread(tempRunable));
		}
		int i = 0;
		for (int j = 0; j < startThreadNumber; j++) {
			while (true) {
				if (!threadPool.get(i).isAlive()) {
					threadPool.get(i).start();
					threadNumber++;
					break;
				}
				i++;
			}
		}
		// for (int i = 0; i < pageinfoList.size(); i++) {
		// 	System.err.println("title" + pageinfoList.get(i).get("title"));// 输出查看效果，不能保留，影响性能
		// 	System.err.println("grand" + pageinfoList.get(i).get("grand"));
		// 	System.err.println("price" + pageinfoList.get(i).get("price"));
		// 	System.err.println("asin" + pageinfoList.get(i).get("asin"));
		// 	System.err.println("reviews" + pageinfoList.get(i).get("reviews"));
		// 	System.err.println("rank" + pageinfoList.get(i).get("rank"));
		// 	System.err.println("date" + pageinfoList.get(i).get("date"));
		// }
	}

	/**
	 * 获取页面信息
	 * 
	 * @param url 要获取的页面url
	 * @return 获取到的信息Map
	 */
	public static Map<String, String> getPageInfo(String url) {
		long currentTimeMillis = System.currentTimeMillis();
		Map<String, String> resultMap = new HashMap<>();
		String resContent = sendGet(url, "");
		while(resContent.isEmpty()) {
			resContent = sendGet(url, "");
		}
		try {
			// 获取标题
			String titlePattern = "id=\"productTitle\".*?>(.*?)</span>";
			while (getMathString(titlePattern, resContent).size() == 0) {
				resContent = sendGet(url);
			}
			List<String> titles = getMathString(titlePattern, resContent);
			String title = "error";
			if (titles.size() > 0)
				title = titles.get(0).trim();
			resultMap.put("title", title);
			// 获取品牌
			String grandPattern = "id=\"bylineInfo\".*?>(.*?)</a>";
			List<String> grands = getMathString(grandPattern, resContent);
			String grand = "error";
			if (grands.size() > 0)
				grand = grands.get(0).trim();
			resultMap.put("grand", grand);
			// 获取价格
			String pricePattern = "id=\"priceblock_ourprice\".*?>(.*?)</span>";
			List<String> prices = getMathString(pricePattern, resContent);
			String price = "error";
			if (prices.size() > 0)
				price = prices.get(0).trim();
			resultMap.put("price", price);
			// 获取编码
			String asinPattern = "id=\"productDetails_detailBullets_sections1\".*?>([\\s\\S])*?ASIN([\\s\\S])*?<td.*?>(([\\s\\S])*?)</td>";
			List<String> asins = getMathString(asinPattern, resContent, 3);
			String asin = "error";
			if (asins.size() > 0)
				asin = asins.get(0).trim();
			resultMap.put("asin", asin);
			// 获取好评率
			String reviewsPattern = "id=\"productDetails_detailBullets_sections1\".*?>([\\s\\S])*?Customer Reviews([\\s\\S])*?<td.*?>([\\s\\S])*?<br>(([\\s\\S])*?)</td>";
			List<String> reviewss = getMathString(reviewsPattern, resContent, 4);
			String reviews = "error";
			if (reviewss.size() > 0)
				reviews = reviewss.get(0).trim();
			resultMap.put("reviews", reviews);
			// 获取评论人数
			String reviewersPattern = "id=\"acrCustomerReviewText\".*?>(.*?)</span>";
			List<String> reviewerss = getMathString(reviewersPattern, resContent);
			String reviewers = "error";
			if (reviewerss.size() > 0)
				reviewers = reviewerss.get(0).trim();
			resultMap.put("reviewers", reviewers);
			// 获取热销榜
			String rankPattern = "id=\"productDetails_detailBullets_sections1\".*?>([\\s\\S])*?Best Sellers Rank([\\s\\S])*?<span>(#([\\s\\S])*?)\\(<a([\\s\\S])*?</td>";
			List<String> ranks = getMathString(rankPattern, resContent, 3);
			String rank = "error";
			if (ranks.size() > 0)
				rank = ranks.get(0).trim();
			resultMap.put("rank", rank);
			// 获取上架日期
			String datePattern = "id=\"productDetails_detailBullets_sections1\".*?>([\\s\\S])*?Date First Available([\\s\\S])*?<td.*?>(([\\s\\S])*?)</td>";
			List<String> dates = getMathString(datePattern, resContent, 3);
			String date = "error";
			if (dates.size() > 0)
				date = dates.get(0).trim();
			resultMap.put("date", date);
		}
		catch(Exception e) {
			e.printStackTrace();
			getPageInfo(url);
		}
		System.err.println("getPageInfo：" + (System.currentTimeMillis() - currentTimeMillis));
		return resultMap;
	}

	/**
	 * 获取查找的字符串中的第一组的List
	 * 
	 * @param pattern 匹配的正则表达式
	 * @param str     要匹配的字符串
	 * @return 匹配查找到的字符串中的第一组的List
	 */
	public static List<String> getMathString(String regex, String str) {
		List<String> ret = new ArrayList<>();
		Pattern pattern = Pattern.compile(regex);
		Matcher matcher = pattern.matcher(str);
		while (matcher.find())
			ret.add(matcher.group(1));
		return ret;
	}

	/**
	 * 获取查找的字符串中的指定组的List
	 * 
	 * @param pattern 匹配的正则表达式
	 * @param str     要匹配的字符串
	 * @param group   返回第几组
	 * @return 匹配查找到的字符串中的第一组的List
	 */
	private static Matcher matcher;

	public static List<String> getMathString(String regex, String str, int group) {
		List<String> ret = new ArrayList<>();
		Pattern pattern = Pattern.compile(regex);
		if (matcher != null)
			matcher.reset();
		matcher = pattern.matcher(str);
		while (matcher.find())
			ret.add(matcher.group(group));
		return ret;
	}

	/**
	 * 向指定URL发送GET方法的请求
	 * 
	 * @param url   发送请求的URL
	 * @param param 请求参数，请求参数应该是 name1=value1&name2=value2 的形式。
	 * @return URL 所代表远程资源的响应结果
	 */
	public static String sendGet(String url, String param) {
		long currentTimeMillis = System.currentTimeMillis();
		StringBuilder result = new StringBuilder();
		BufferedReader in = null;
		try {
			String urlNameString = url + "?" + param;
			URL realUrl = new URL(urlNameString);
			// 打开和URL之间的连接
			URLConnection connection = realUrl.openConnection();
			// 设置通用的请求属性
			connection.setRequestProperty("Accept", "text/html, application/xhtml+xml, image/jxr, *");
			connection.setRequestProperty("Accept-Language", "zh-Hans-CN, zh-Hans; q=0.8, en-US; q=0.5, en; q=0.3");
			String userAgents[] = { "Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
					"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.16 Safari/537.36",
					"Mozilla/5.0 (Windows NT 6.1; Intel Mac OS X 10.6; rv:7.0.1) Gecko/20100101 Firefox/7.0.1",
					"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36 OPR/18.0.1284.68",
					"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)",
					"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)",
					"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0)",
					"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36",
					"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
					"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:7.0.1) Gecko/20100101 Firefox/7.0.1",
					"Opera/9.80 (Macintosh; Intel Mac OS X 10.9.1) Presto/2.12.388 Version/12.16",
					"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36 OPR/18.0.1284.68",
					"Mozilla/5.0 (iPad; CPU OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) CriOS/30.0.1599.12 Mobile/11A465 Safari/8536.25",
					"Mozilla/5.0 (iPad; CPU OS 8_0 like Mac OS X) AppleWebKit/600.1.3 (KHTML, like Gecko) Version/8.0 Mobile/12A4345d Safari/600.1.4",
					"Mozilla/5.0 (iPad; CPU OS 7_0_2 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A501 Safari/9537.53",
					"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299" };
			connection.setRequestProperty("User-Agent", userAgents[new Random().nextInt(15) + 1]);
			// 建立实际的连接
			connection.connect();
			connection.setConnectTimeout(30000);
			connection.setReadTimeout(30000);
			// // 获取所有响应头字段
			// Map<String, List<String>> map = connection.getHeaderFields();
			// // 遍历所有的响应头字段
			// for (String key : map.keySet()) {
			// System.err.println(key + "--->" + map.get(key));
			// }
			// 定义 BufferedReader输入流来读取URL的响应
			in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
			String line;
			while ((line = in.readLine()) != null) {
				result.append(line);
			}
		} catch (Exception e) {
			System.err.println("发送GET请求出现异常！" + e);
			e.printStackTrace();
			sendGet(url, param);
		}
		// 使用finally块来关闭输入流
		finally {
			try {
				if (in != null) {
					in.close();
				}
			} catch (Exception e2) {
				e2.printStackTrace();
			}
		}
		System.err.println("sendGet：" + (System.currentTimeMillis() - currentTimeMillis));
		return result.toString();
	}
	/**
	 * 向指定URL发送GET方法的请求
	 * 
	 * @param url   发送请求的URL
	 * @param param 请求参数，请求参数应该是 name1=value1&name2=value2 的形式。
	 * @return URL 所代表远程资源的响应结果
	 */
	public static String sendGet(String url) {
		long currentTimeMillis = System.currentTimeMillis();
		StringBuilder result = new StringBuilder();
		BufferedReader in = null;
		try {
			String urlNameString = url;
			URL realUrl = new URL(urlNameString);
			// 打开和URL之间的连接
			URLConnection connection = realUrl.openConnection();
			// 设置通用的请求属性
			connection.setRequestProperty("Accept", "text/html, application/xhtml+xml, image/jxr, *");
			connection.setRequestProperty("Accept-Language", "zh-Hans-CN, zh-Hans; q=0.8, en-US; q=0.5, en; q=0.3");
			String userAgents[] = { "Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
					"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.16 Safari/537.36",
					"Mozilla/5.0 (Windows NT 6.1; Intel Mac OS X 10.6; rv:7.0.1) Gecko/20100101 Firefox/7.0.1",
					"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36 OPR/18.0.1284.68",
					"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)",
					"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)",
					"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0)",
					"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36",
					"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
					"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:7.0.1) Gecko/20100101 Firefox/7.0.1",
					"Opera/9.80 (Macintosh; Intel Mac OS X 10.9.1) Presto/2.12.388 Version/12.16",
					"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36 OPR/18.0.1284.68",
					"Mozilla/5.0 (iPad; CPU OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) CriOS/30.0.1599.12 Mobile/11A465 Safari/8536.25",
					"Mozilla/5.0 (iPad; CPU OS 8_0 like Mac OS X) AppleWebKit/600.1.3 (KHTML, like Gecko) Version/8.0 Mobile/12A4345d Safari/600.1.4",
					"Mozilla/5.0 (iPad; CPU OS 7_0_2 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A501 Safari/9537.53",
			"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299" };
			connection.setRequestProperty("User-Agent", userAgents[new Random().nextInt(15) + 1]);
			// 建立实际的连接
			connection.connect();
			// // 获取所有响应头字段
			// Map<String, List<String>> map = connection.getHeaderFields();
			// // 遍历所有的响应头字段
			// for (String key : map.keySet()) {
			// System.err.println(key + "--->" + map.get(key));
			// }
			// 定义 BufferedReader输入流来读取URL的响应
			in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
			String line;
			while ((line = in.readLine()) != null) {
				result.append(line);
			}
		} catch (Exception e) {
			System.err.println("发送GET请求出现异常！" + e);
			e.printStackTrace();
			sendGet(url);
		}
		// 使用finally块来关闭输入流
		finally {
			try {
				if (in != null) {
					in.close();
				}
			} catch (Exception e2) {
				e2.printStackTrace();
			}
		}
		System.err.println("sendGet：" + (System.currentTimeMillis() - currentTimeMillis));
		return result.toString();
	}
}
